# -*- coding: utf-8 -*-
"""

Storing date-times for start and end of interpolated portion of interpolated tracks

@author: rklotz
"""

import pandas as pd
#import numpy as np
import geopandas as gpd
#from datetime import datetime
import os


###############################################################################

# Inputs 
tracks_gdb = r"H:\My Drive\Boats\ReplicationCode\data\AIS\lines_v2\merged_lines.gdb"
layer_tag = r"cleaned_v2_%s"
years = [2009,2010,2011,2012,2013,2014,2015,2016] # ,"_2010","_2011","_2012","_2013","_2014","_2015","_2016"]

# Output
interp_times_out = r"H:\My Drive\Boats\ReplicationCode\data\AIS\interp_times\interp_times.csv"

# make folder for output
if not os.path.exists(os.path.split(interp_times_out)[0]):
    os.makedirs(os.path.split(interp_times_out)[0])   


df_list = [] 
for year in years :
    print(year)

    gdf_tracks = gpd.GeoDataFrame.from_file(tracks_gdb, layer=layer_tag % year)
    gdf_tracks['trackID_wInterp'] = gdf_tracks.index + 1
    
    
    # get interpolated tracks
    # explode out track ids of tracks used for interpolation
    df_tmp = gdf_tracks.loc[gdf_tracks.INTERP_FLAG==1,['trackID_wInterp','INTERP_TRACKS']].drop_duplicates()  
    df_tmp = df_tmp.set_index(['trackID_wInterp']).apply(lambda x: x.str.split(',').explode()).reset_index() # this splits interpolated track_ids and stacks
    df_tmp.INTERP_TRACKS = df_tmp.INTERP_TRACKS.astype(int)
    df_tmp = df_tmp.rename(columns={'INTERP_TRACKS':'trackID_wInterp','trackID_wInterp':'INTERP_ID'})
    df_tmp = df_tmp.merge(gdf_tracks[['trackID_wInterp','TrackStartTime','TrackEndTime']],left_on='trackID_wInterp',right_on='trackID_wInterp',how='left')
    
    # sort
    df_tmp.sort_values(['INTERP_ID','trackID_wInterp'],ascending=True)
    
    # construct start and ending times of interpolated portions
    df_tmp['InterpStartTime'] = df_tmp.groupby('INTERP_ID')['TrackEndTime'].shift()
    df_tmp = df_tmp.rename(columns={'TrackStartTime':'InterpEndTime'})
    df_tmp = df_tmp.loc[df_tmp["InterpStartTime"].notna()]
    df_tmp['interp_num'] = df_tmp.groupby('INTERP_ID').cumcount() + 1
        
    # now merge back in interpolated track identifiers
    df_tmp = df_tmp.rename(columns={'INTERP_ID':'trackID_wInterp' , 'trackID_wInterp' : 'INTERP_TRACK'} )
    df_tmp = df_tmp.merge(gdf_tracks[['trackID_wInterp','TrackStartTime','MMSI','INTERP_FLAG']],on='trackID_wInterp',how='left')
    df_tmp = df_tmp.drop(columns=['trackID_wInterp','TrackEndTime'])
    
    # add to list 
    df_list.append(df_tmp)

# concat all the tracks and 
print("Saving")
df_all = pd.concat(df_list, ignore_index=True,sort=True) 
df_all.to_csv(interp_times_out,sep=',',index=False,header=True)

